# Prevent printing of warnings and such in the HTML
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
library(tidyverse)
library(multitaper)
library(GenomicAlignments)
library(parallel)
This is to be done using the multitaper package https://github.com/krahim/multitaper/.
bam.locs <- dir("../../alignment/hisat2/output", pattern = ".bam", recursive = TRUE, full.names = TRUE)
names(bam.locs) <- bam.locs %>%
str_split(pattern = "/") %>%
map(function(x){x[[6]]}) %>%
str_remove(".bam") %>%
str_replace("AraR", "REL60")
Read the bams in, only keeping + strand reads and excluding ERCCs and tRNAs.
bam.list <- mclapply(bam.locs, function(x){
b1 <- readGAlignments(x)
b2 <- b1[strand(b1) == "+" & grepl("ECB_", seqnames(b1))]
return(b2)
}, mc.cores = 40)
Calculate periodicity, this seems to want to return a graph no matter what.
df.list <- lapply(bam.list, function(x){
# count number of reads at each end point
t1 <- table(factor(end(x), levels = 15:150))
# apply spec function
t2 <- spec.pgram(as.ts(t1))
# get a df of graphable parts, rescale the spec height to 0-1 so it's comparable across samples
t3 <- tibble(period = 1/t2$freq,
spec = t2$spec)
# return said df
return(t3)
})
Bind all those together
df1 <- bind_rows(df.list, .id = "sample") %>%
separate(sample, into = c("rep", "seqtype", "line"), sep = "-") %>%
filter(is.finite(period) & period <= 4) # only retain a small range
df1$line <- gsub("M", "-", df1$line) %>%
gsub("P", "+", .) %>%
gsub("AraR", "REL0", .)
# save it
write_csv(df1, "../../data_frames/table_s2_three_nt_periodicity.csv")
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] GenomicAlignments_1.24.0 Rsamtools_2.4.0
## [3] Biostrings_2.56.0 XVector_0.28.0
## [5] SummarizedExperiment_1.18.1 DelayedArray_0.14.0
## [7] matrixStats_0.56.0 Biobase_2.48.0
## [9] GenomicRanges_1.40.0 GenomeInfoDb_1.24.2
## [11] IRanges_2.22.2 S4Vectors_0.26.1
## [13] BiocGenerics_0.34.0 multitaper_1.0-14
## [15] forcats_0.5.0 stringr_1.4.0
## [17] dplyr_1.0.1 purrr_0.3.4
## [19] readr_1.3.1 tidyr_1.1.0
## [21] tibble_3.0.3 ggplot2_3.3.2
## [23] tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.1 jsonlite_1.7.0 modelr_0.1.8
## [4] assertthat_0.2.1 blob_1.2.1 GenomeInfoDbData_1.2.3
## [7] cellranger_1.1.0 yaml_2.2.1 pillar_1.4.6
## [10] backports_1.1.8 lattice_0.20-41 glue_1.4.1
## [13] digest_0.6.25 rvest_0.3.5 colorspace_1.4-1
## [16] htmltools_0.5.0 Matrix_1.2-18 pkgconfig_2.0.3
## [19] broom_0.5.6 haven_2.3.1 zlibbioc_1.34.0
## [22] scales_1.1.1 BiocParallel_1.22.0 generics_0.0.2
## [25] ellipsis_0.3.1 withr_2.2.0 cli_2.0.2
## [28] magrittr_1.5 crayon_1.3.4 readxl_1.3.1
## [31] evaluate_0.14 fs_1.4.2 fansi_0.4.1
## [34] nlme_3.1-149 xml2_1.3.2 tools_4.0.3
## [37] hms_0.5.3 lifecycle_0.2.0 munsell_0.5.0
## [40] reprex_0.3.0 compiler_4.0.3 rlang_0.4.7
## [43] grid_4.0.3 RCurl_1.98-1.2 rstudioapi_0.11
## [46] bitops_1.0-6 rmarkdown_2.3 gtable_0.3.0
## [49] DBI_1.1.0 R6_2.4.1 lubridate_1.7.9
## [52] knitr_1.29 stringi_1.4.6 Rcpp_1.0.5
## [55] vctrs_0.3.2 dbplyr_1.4.4 tidyselect_1.1.0
## [58] xfun_0.16